{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "febec89f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "5cb42221",
   "metadata": {},
   "outputs": [],
   "source": [
    "def dialogue_data_process(example, task_name, idx):\n",
    "    \"\"\"\n",
    "    {\n",
    "        \"knowledge\": \"Fight Club is starring Jared Leto. Jared Leto starred in Panic Room\", \n",
    "        \"dialogue_history\": \"[Human]: Could you recommend some movies similar to Fight Club? \", \n",
    "        \"right_response\": \"Sure, Panic Room is a similar movie\", \n",
    "        \"hallucinated_response\": \"Sure, Mr. Nobody is a similar movie.\"\n",
    "    }\n",
    "    \"\"\"\n",
    "    return {\n",
    "        \"task_name\": task_name, \n",
    "        \"idx\": idx,\n",
    "        \"instruction\": \"{}\\n{}\".format(example[\"knowledge\"], example[\"dialogue_history\"]),\n",
    "        \"answer_positive\": [example[\"right_response\"]],\n",
    "        \"answer_negative\": [example[\"hallucinated_response\"]],\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "35a0d1ca",
   "metadata": {},
   "outputs": [],
   "source": [
    "def general_data_process(example, task_name, idx):\n",
    "    \"\"\"\n",
    "    {\n",
    "        \"ID\": \"10\", \n",
    "        \"user_query\": \"Create a design for the given text snippet.\\nUnited We Stand\", \n",
    "        \"chatgpt_response\": \"| U | N | I | T | E | D |\\n| W | E |\\n| S | T | A | N | D |\", \n",
    "        \"hallucination\": \"no\", \n",
    "        \"hallucination_spans\": []\n",
    "    }\n",
    "    \"\"\"\n",
    "    return {\n",
    "        \"task_name\": task_name, \n",
    "        \"idx\": idx,\n",
    "        \"instruction\": \"{}\".format(example[\"user_query\"]),\n",
    "        \"answer_positive\": [example[\"chatgpt_response\"]],\n",
    "        \"answer_negative\": [example[\"hallucination\"]],\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "fee1d8b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "def qa_data_process(example, task_name, idx):\n",
    "    \"\"\"\n",
    "    {\n",
    "        \"knowledge\": \" Donahue replaced Kelli Ward who resigned to run for the United States Senate.Kelli Ward (\\\"née\\\" Kaznoski; born January 25, 1969) is an American politician and osteopathic physician.\", \n",
    "        \"question\": \"Which  American politician did Donahue replaced \", \n",
    "        \"right_answer\": \"Kelli Ward\", \n",
    "        \"hallucinated_answer\": \"Donahue replaced Kelli Ward's position.\"\n",
    "    }\n",
    "    \"\"\"\n",
    "    return {\n",
    "        \"task_name\": task_name, \n",
    "        \"idx\": idx,\n",
    "        \"instruction\": \"{}\\n{}\".format(example[\"knowledge\"], example[\"question\"]),\n",
    "        \"answer_positive\": [example[\"right_answer\"]],\n",
    "        \"answer_negative\": [example[\"hallucinated_answer\"]],\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "7d329e77",
   "metadata": {},
   "outputs": [],
   "source": [
    "def summarization_data_process(example, task_name, idx):\n",
    "    \"\"\"\n",
    "    {\n",
    "        \"document\": \" Donahue replaced Kelli Ward who resigned to run for the United States Senate.Kelli Ward (\\\"née\\\" Kaznoski; born January 25, 1969) is an American politician and osteopathic physician.\", \n",
    "        \"right_summary\": \"zxxxx\", \n",
    "        \"hallucinated_summary\": \"Kelli Ward\", \n",
    "    }\n",
    "    \"\"\"\n",
    "    return {\n",
    "        \"task_name\": task_name, \n",
    "        \"idx\": idx,\n",
    "        \"instruction\": \"Document: {}\\nQ: {}\".format(example[\"document\"], \"Please summarize the document.\"),\n",
    "        \"answer_positive\": [example[\"right_summary\"]],\n",
    "        \"answer_negative\": [example[\"hallucinated_summary\"]],\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "d5089e60",
   "metadata": {},
   "outputs": [],
   "source": [
    "task_name_list = {\n",
    "    \"dialogue_data\": dialogue_data_process,\n",
    "    \"general_data\": general_data_process,\n",
    "    \"qa_data\": qa_data_process,\n",
    "    \"summarization_data\": summarization_data_process\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "e893769e",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████| 10000/10000 [00:00<00:00, 833625.63it/s]\n",
      "100%|██████████████████████████████████| 4507/4507 [00:00<00:00, 1167617.55it/s]\n",
      "100%|█████████████████████████████████| 10000/10000 [00:00<00:00, 836251.69it/s]\n",
      "100%|█████████████████████████████████| 10000/10000 [00:00<00:00, 226407.06it/s]\n"
     ]
    }
   ],
   "source": [
    "all_data = list()\n",
    "\n",
    "for task_name, processor in task_name_list.items():\n",
    "    with open(\"{}.json\".format(task_name), \"r\", encoding=\"utf-8\") as fr:\n",
    "        lines = fr.readlines()\n",
    "        examples = [json.loads(i) for i in lines]\n",
    "        for ei, example in enumerate(tqdm(examples)):\n",
    "            example = processor(example, task_name, ei + 1)\n",
    "            all_data.append(example)\n",
    "            "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "70db1fcf",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"halueval_dataset.json\", \"w\", encoding=\"utf-8\") as fw:\n",
    "    for example in all_data:\n",
    "        fw.write(json.dumps(example) + \"\\n\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
